Other plots on this website:
Also on this website:
import io
import json
import requests
import datetime as dt
import numpy as np
import plotly.graph_objects as go
import plotly.offline as pyo
from matplotlib.dates import date2num, num2date
from scipy.optimize import curve_fit
from IPython.display import Markdown, display, Math
from scipy import stats as sps
from scipy.interpolate import interp1d
import pandas as pd
pyo.init_notebook_mode()
json_countries = "https://raw.githubusercontent.com/maxdevblock/covid-19-time-series/master/json/COVID-COUNTRIES.json"
with requests.get(json_countries) as req:
data = json.loads(req.content.decode('utf-8-sig'))
define_p = [.95, .5]
# We create an array for every possible value of Rt
R_T_MAX = 12
r_t_range = np.linspace(0, R_T_MAX, R_T_MAX*100+1)
# Gamma is 1/serial interval
# https://wwwnc.cdc.gov/eid/article/26/7/20-0282_article
# https://www.nejm.org/doi/full/10.1056/NEJMoa2001316
GAMMA = 1/7.5
def get_df(_data, country):
csv = "data,cases\n"
for i, l in enumerate(_data[country]["Confirmed"]):
n = _data[country]["Confirmed"][l]
csv += f"{l},{n}"
if i < len(_data[country]["Confirmed"]) - 1:
csv += "\n"
data = None
data = pd.read_csv(io.StringIO(csv),
usecols=['data', 'cases'],
parse_dates=['data'],
index_col=['data'],
squeeze=True).sort_index()
return data
def prepare_cases(cases, cutoff=1, std=2):
new_cases = cases.diff()
new_cases[new_cases < 0] = 0
smoothed = new_cases.rolling(7,
win_type='gaussian',
min_periods=1,
center=True).mean(std=std).round()
smoothed[smoothed < 0] = 0
idx_start = np.searchsorted(smoothed, cutoff)
smoothed = smoothed.iloc[idx_start:]
original = new_cases.loc[smoothed.index]
return original, smoothed
def highest_density_interval(pmf, p=.9):
# If we pass a DataFrame, just call this recursively on the columns
if(isinstance(pmf, pd.DataFrame)):
return pd.DataFrame([highest_density_interval(pmf[col], p=p) for col in pmf],
index=pmf.columns)
cumsum = np.cumsum(pmf.values)
# N x N matrix of total probability mass for each low, high
total_p = cumsum - cumsum[:, None]
# Return all indices with total_p > p
lows, highs = np.array([]), np.array([])
j = 0
while not lows.size or not highs.size:
lows, highs = (total_p > (p-j)).nonzero()
j += .05
# Find the smallest range (highest density)
best = (highs - lows).argmin()
low = pmf.index[lows[best]]
high = pmf.index[highs[best]]
return pd.Series([low, high],
index=[f'Low_{p*100:.0f}',
f'High_{p*100:.0f}'])
def HDI_of_grid(probMassVec, credMass=0.95):
sortedProbMass = np.sort(probMassVec, axis=None)[::-1]
HDIheightIdx = np.min(np.where(np.cumsum(sortedProbMass) >= credMass))
HDIheight = sortedProbMass[HDIheightIdx]
HDImass = np.sum(probMassVec[probMassVec >= HDIheight])
idx = np.where(probMassVec >= HDIheight)[0]
return {'indexes':idx, 'mass':HDImass, 'height':HDIheight}
def HDI_of_grid_from_df(pmf, p):
# If we pass a DataFrame, just call this recursively on the columns
if(isinstance(pmf, pd.DataFrame)):
return pd.DataFrame([HDI_of_grid_from_df(pmf[col], p=p) for col in pmf],
index=pmf.columns)
res = HDI_of_grid(pmf, p)
#print(res["indexes"])
lo_idx = res["indexes"][0]
hi_idx = res["indexes"][-1]
lo = pmf.index[lo_idx]
hi = pmf.index[hi_idx]
return pd.Series([lo, hi],
index=[f'Low_{p*100:.0f}',
f'High_{p*100:.0f}'])
def HDIs(pmf, P=[.95, .5]):
RES = []
for p in P:
res = HDI_of_grid_from_df(pmf, p=p)
RES.append(res)
return RES
def get_posteriors(sr, sigma=0.15):
# (1) Calculate Lambda
lam = sr[:-1].values * np.exp(GAMMA * (r_t_range[:, None] - 1))
# (2) Calculate each day's likelihood
likelihoods = pd.DataFrame(
data = sps.poisson.pmf(sr[1:].values, lam),
index = r_t_range,
columns = sr.index[1:])
# (3) Create the Gaussian Matrix
process_matrix = sps.norm(loc=r_t_range,
scale=sigma
).pdf(r_t_range[:, None])
# (3a) Normalize all rows to sum to 1
process_matrix /= process_matrix.sum(axis=0)
# (4) Calculate the initial prior
prior0 = sps.gamma(a=4).pdf(r_t_range)
prior0 /= prior0.sum()
# Create a DataFrame that will hold our posteriors for each day
# Insert our prior as the first posterior.
posteriors = pd.DataFrame(
index=r_t_range,
columns=sr.index,
data={sr.index[0]: prior0}
)
# We said we'd keep track of the sum of the log of the probability
# of the data for maximum likelihood calculation.
log_likelihood = 0.0
# (5) Iteratively apply Bayes' rule
for previous_day, current_day in zip(sr.index[:-1], sr.index[1:]):
#(5a) Calculate the new prior
current_prior = process_matrix @ posteriors[previous_day]
#(5b) Calculate the numerator of Bayes' Rule: P(k|R_t)P(R_t)
numerator = likelihoods[current_day] * current_prior
#(5c) Calcluate the denominator of Bayes' Rule P(k)
denominator = np.sum(numerator)
# Execute full Bayes' Rule
posteriors[current_day] = numerator/denominator
# Add to the running sum of log likelihoods
log_likelihood += np.log(denominator)
return posteriors, log_likelihood
def plotly_rt(result, name):
index = result['ML'].index.get_level_values('data')
values = result['ML'].values
# Aesthetically, extrapolate credible interval by 1 day either side
lofn = interp1d(date2num(index),
result['Low_95'].values,
bounds_error=False,
fill_value='extrapolate')
hifn = interp1d(date2num(index),
result['High_95'].values,
bounds_error=False,
fill_value='extrapolate')
# Aesthetically, extrapolate credible interval by 1 day either side
lofn50 = interp1d(date2num(index),
result['Low_50'].values,
bounds_error=False,
fill_value='extrapolate')
hifn50 = interp1d(date2num(index),
result['High_50'].values,
bounds_error=False,
fill_value='extrapolate')
extended = pd.date_range(start=index[0], end=index[-1]+pd.Timedelta(days=1))
hyperextended = pd.date_range(start=index[0]-pd.Timedelta(days=1), end=index[-1]+pd.Timedelta(days=2))
fig = go.Figure()
fig.add_trace(
go.Scatter(
x=[index[0]-pd.Timedelta(days=10), index[-1]+pd.Timedelta(days=10)], y=[1, 1],
line = dict(color='black', width=1), showlegend=False,
)
)
fig.add_trace(
go.Scatter(
x=index, y=lofn50(date2num(extended)),
line_color="rgba(0,0,0,.25)", showlegend=False,
name="lo50 R<sub>t</sub>"
)
)
fig.add_trace(
go.Scatter(
x=index, y=hifn50(date2num(extended)),
line_color="rgba(0,0,0,.25)", showlegend=False,
fill="tonexty", fillcolor="rgba(0,0,0,.1)",
name="hi50 R<sub>t</sub>"
)
)
fig.add_trace(
go.Scatter(
x=index, y=lofn(date2num(extended)),
line_color="rgba(0,0,0,.1)", showlegend=False,
name="lo95 R<sub>t</sub>"
)
)
fig.add_trace(
go.Scatter(
x=index, y=hifn(date2num(extended)),
line_color="rgba(0,0,0,.1)", showlegend=False,
fill="tonexty", fillcolor="rgba(0,0,0,.1)",
name="hi95 R<sub>t</sub>"
)
)
fig.add_trace(
go.Scatter(
x=index, y=values,
marker=dict(
size=7,
line=dict(width=1, color="black"),
color=values,
cmin=0,
cmax=max(values),
colorbar=dict(
title="R<sub>t</sub>"
),
colorscale=[
[0., "rgba(0,150,0,1)"],
[1./max(values), "rgba(255, 255, 255, 1)"],
[1., "rgba(255,0,0,1)"],
]
),
mode="markers+lines", showlegend=False,
line = dict(color='grey', width=2, dash='dot'),
name="R<sub>t</sub>",
)
)
fig.update_layout(legend_orientation="h",
showlegend=True, plot_bgcolor='rgba(0,0,0,0)',
yaxis={"gridcolor": '#bdbdbd', "zerolinecolor": '#969696', "range":[0, 5]},
xaxis={"gridcolor": '#bdbdbd', "range":[hyperextended[0], hyperextended[-1]]},
title={"text": "Real time {} R<sub>t</sub>".format(name), "xanchor": "center", "x": 0.5},
yaxis_title="R<sub>t</sub>", hovermode="x unified"
)
pyo.iplot(fig)
print("FIRST ENTRY DATE: {}".format(
list( data["US"]["Confirmed"].keys() )[0]
)
)
print("LAST ENTRY DATE: {}".format(
list( data["US"]["Confirmed"].keys() )[-1]
)
)
period = (
dt.datetime.strptime(list(data["US"]["Confirmed"].keys())[-1], "%Y-%m-%d") -
dt.datetime.strptime(list(data["US"]["Confirmed"].keys())[0], "%Y-%m-%d")
).days
print("COVERAGE: {} days".format(period))
print("CURRENT DATE IS: {}".format(dt.datetime.now().strftime("%Y-%m-%d %H:%M:%S")))
def get_best_countries(json_data=None, number=25):
LAST_CONFIRMED = {}
for country in json_data:
last_date = sorted(json_data[country]["Confirmed"])[-1]
last_confirmed = json_data[country]["Confirmed"][last_date]
LAST_CONFIRMED.update({
country: last_confirmed
})
LAST_CONFIRMED = {k: v for k, v in sorted(LAST_CONFIRMED.items(), key=lambda item: item[1], reverse=True)}
return sorted(list(LAST_CONFIRMED)[:number])
best_countries = get_best_countries(json_data=data)
print(best_countries)
x = [] # datetime x array
_x = [] # integer x array
yC = {} # new confirmed cases array
yD = {} # new deaths array
yR = {} # new recovered array
yP = {} # new infected array
TOTyC = {} # confirmed cases array
TOTyD = {} # deaths array
TOTyR = {} # recovered array
TOTyP = {} # infected array
for day in data[best_countries[0]]["Confirmed"]:
# x values
x.append(dt.datetime.strptime(day, "%Y-%m-%d"))
_x.append(len(x) - 1)
for country in best_countries:
yC.update({country: np.array([])})
yD.update({country: np.array([])})
yR.update({country: np.array([])})
yP.update({country: np.array([])})
TOTyC.update({country: []})
TOTyD.update({country: []})
TOTyR.update({country: []})
TOTyP.update({country: []})
for i, day in enumerate(data[country]["Confirmed"]):
if i:
# dy values
yC[country] = np.append(yC[country], data[country]["Confirmed"][day] - TOTyC[country][-1])
yD[country] = np.append(yD[country], data[country]["Deaths"][day] - TOTyD[country][-1])
yR[country] = np.append(yR[country], data[country]["Recovered"][day] - TOTyR[country][-1])
yP[country] = np.append(yP[country],
(data[country]["Confirmed"][day] - data[country]["Deaths"][day] - data[country]["Recovered"][day]) -
TOTyP[country][-1]
)
# y TOT values
TOTyC[country].append(data[country]["Confirmed"][day])
TOTyD[country].append(data[country]["Deaths"][day])
TOTyR[country].append(data[country]["Recovered"][day])
TOTyP[country].append(data[country]["Confirmed"][day] - data[country]["Deaths"][day] - data[country]["Recovered"][day])
menu = "#### COUNTRIES:\n"
for country in best_countries:
menu += "- [{}](#{})\n".format(country, country.replace(" ", "-"))
display(Markdown(menu))
#google = pd.read_csv(
# "https://www.gstatic.com/covid19/mobility/Global_Mobility_Report.csv",
# parse_dates=['date'], dtype={"sub_region_1": str, "sub_region_2": str},
# index_col=["date"]
#)
google = pd.read_pickle("google-mobility.pkl")
_R0 = {}
for country in best_countries:
display(Markdown("## {}".format(country)))
fig = go.Figure()
fig.add_trace(
go.Bar(
name="Infected", x=x, y=TOTyP[country], marker_color="cyan"
)
)
fig.add_trace(
go.Bar(
name="Recovered", x=x, y=TOTyR[country], marker_color="lime"
)
)
fig.add_trace(
go.Bar(
name="Deaths", x=x, y=TOTyD[country], marker_color="red"
)
)
fig.add_trace(
go.Scatter(
name="Total", x=x, y=TOTyC[country], marker_color="blue", line_shape='spline',
)
)
fig.update_layout(
showlegend=True, plot_bgcolor='rgba(0,0,0,0)',
xaxis={"gridcolor": '#bdbdbd'},
title={"text": "{} overview".format(country), "xanchor": "center", "x": 0.5},
hovermode="x unified", barmode='stack'
)
fig.update_yaxes(title_text="number", gridcolor='#bdbdbd')
pyo.iplot(fig)
fig = go.Figure()
# confirmed
fig.add_trace(go.Scatter(
x=x[1:], y=yC[country],
mode='lines+markers',
marker_color="blue", marker_size=5, marker_symbol="circle", marker_line_width=1,
line_shape='spline',
legendgroup="cases", name="new cases/day"
))
# deaths
fig.add_trace(go.Scatter(
x=x[1:], y=yD[country],
mode='lines+markers',
marker_color="red", marker_size=5, marker_symbol="diamond", marker_line_width=1,
line_shape='spline',
legendgroup="deaths", name="new deaths/day"
))
# recovered
fig.add_trace(
go.Scatter(
x=x[1:], y=yR[country],
mode='lines+markers',
marker_color="lime", marker_size=5, marker_symbol="square", marker_line_width=1,
line_shape='spline',
legendgroup="recovered", name="new recovered/day"
))
# infected
fig.add_trace(go.Scatter(
x=x[1:], y=yP[country],
mode='lines+markers',
marker_color="lightskyblue", marker_size=5, marker_symbol="square",
line_shape='spline',
legendgroup="infected", name="new infected/day"
))
fig.update_layout(legend_orientation="h",
showlegend=True, plot_bgcolor='rgba(0,0,0,0)',
yaxis={"gridcolor": '#bdbdbd', "zerolinecolor": '#969696'},
xaxis={"gridcolor": '#bdbdbd'},
title={"text": "{}, daily increase".format(country), "xanchor": "center", "x": 0.5},
yaxis_title="new/day",
)
pyo.iplot(fig)
try:
df = get_df(data, country)
original, smoothed = prepare_cases(df, std=2.5)
# Note that we're fixing sigma to a value just for the example
posteriors, log_likelihood = get_posteriors(smoothed, sigma=.20)
# Note that this takes a while to execute - it's not the most efficient algorithm
HDIS = HDIs(posteriors)
#hdis = highest_density_interval(posteriors, p=.9)
most_likely = posteriors.idxmax().rename('ML')
result = most_likely
for hdis in HDIS:
result = pd.concat([result, hdis], axis=1)
# Look into why you shift -1
#result = pd.concat([most_likely, hdis], axis=1)
plotly_rt(result, country)
except Exception as err:
print(f"ERROR with Ro: {err}")
if country == "US":
country = "United States"
_df = google.loc[google["country_region"] == country]
_df = _df.loc[_df.fillna("NONE")["sub_region_1"] == "NONE"]
fig = go.Figure()
for column in _df.columns[4:]:
fig.add_trace(
go.Scatter(
x=_df.index,
y=_df[column],
name=column.replace("_", " ").title().split(" Percent")[0]
)
)
fig.update_layout(
legend_orientation="h",
showlegend=True, plot_bgcolor='rgba(0,0,0,0)',
yaxis={"gridcolor": '#bdbdbd', "zerolinecolor": '#969696'},
xaxis={"gridcolor": '#bdbdbd'},
title={"text": f"{country} mobility (last update {_df.index[-1].date() if _df.index.size else 'None'})", "xanchor": "center", "x": 0.5},
yaxis_title="Percentage Change from Baseline", hovermode="x unified"
)
pyo.iplot(fig)
df = None
_df = None
display(Markdown("---"))
# with open("R0_countries.json", "w") as f:
# json.dump(_R0, f)
Exported from countries/single.ipynb committed by Max Pierini on Mon Aug 31 10:59:12 2020 revision 1, 8eb7ef8